gorgonia.org/gorgonia@v0.9.17/cuda modules/compile.py (about) 1 import glob 2 from subprocess import call 3 from os.path import basename, splitext 4 5 compute = [30, 32, 35, 37, 50, 52, 53, 60, 61, 62] 6 def main(): 7 src = glob.glob("src/*.cu") 8 cmd = ['nvcc', '-o=', '-arch=', '-ptx', '-Xptxas', '-allow-expensive-optimizations'] 9 # slow 10 cmdslow = cmd[:] 11 cmdslow.extend(['-fmad=false', '-ftz=false', '-prec-div=true', '-prec-sqrt=true', "INPUTFILE"]) 12 13 for f in src: 14 name, ext = splitext(basename(f)) 15 for cc in compute: 16 cmdslow[1] = '-o="target/' + name + '_cc' + str(cc) + '.ptx"' 17 cmdslow[-1] = f 18 cmdslow[2] = '-arch=compute_'+str(cc) 19 print(cmdslow) 20 call(cmdslow) 21 22 #fast 23 cmdfast = cmd[:] 24 cmdfast.extend(['-fmad=false', '-use_fast_math', "INPUTFILE"]) 25 26 if __name__ == '__main__': 27 main()